December 01, 2020
library(tidyverse)
library(sf)
library(tigris)
library(leaflet)
#PGE Electric
pge_17_q1_elec <- read_csv("PGE_2017_Q1_ElectricUsageByZip.csv")
pge_17_q2_elec <- read_csv("PGE_2017_Q2_ElectricUsageByZip.csv")
pge_17_q3_elec <- read_csv("PGE_2017_Q3_ElectricUsageByZip.csv")
pge_17_q4_elec <- read_csv("PGE_2017_Q4_ElectricUsageByZip.csv")
pge_18_q1_elec <- read_csv("PGE_2018_Q1_ElectricUsageByZip.csv")
pge_18_q2_elec <- read_csv("PGE_2018_Q2_ElectricUsageByZip.csv")
pge_18_q3_elec <- read_csv("PGE_2018_Q3_ElectricUsageByZip.csv")
pge_18_q4_elec <- read_csv("PGE_2018_Q4_ElectricUsageByZip.csv")
pge_19_q1_elec <- read_csv("PGE_2019_Q1_ElectricUsageByZip.csv")
pge_19_q2_elec <- read_csv("PGE_2019_Q2_ElectricUsageByZip.csv")
pge_19_q3_elec <- read_csv("PGE_2019_Q3_ElectricUsageByZip.csv")
pge_19_q4_elec <- read_csv("PGE_2019_Q4_ElectricUsageByZip.csv")
pge_20_q1_elec <- read_csv("PGE_2020_Q1_ElectricUsageByZip.csv")
pge_20_q2_elec <- read_csv("PGE_2020_Q2_ElectricUsageByZip.csv")
pge_20_q3_elec <- read_csv("PGE_2020_Q3_ElectricUsageByZip.csv")
pge_17_q1_elec$kBTU = pge_17_q1_elec$TOTALKWH * 3.412
pge_17_q2_elec$kBTU = pge_17_q2_elec$TOTALKWH * 3.412
pge_17_q3_elec$kBTU = pge_17_q3_elec$TOTALKWH * 3.412
pge_17_q4_elec$kBTU = pge_17_q4_elec$TOTALKWH * 3.412
pge_18_q1_elec$kBTU = pge_18_q1_elec$TOTALKWH * 3.412
pge_18_q2_elec$kBTU = pge_18_q2_elec$TOTALKWH * 3.412
pge_18_q3_elec$kBTU = pge_18_q3_elec$TOTALKWH * 3.412
pge_18_q4_elec$kBTU = pge_18_q4_elec$TOTALKWH * 3.412
pge_19_q1_elec$kBTU = pge_19_q1_elec$TOTALKWH * 3.412
pge_19_q2_elec$kBTU = pge_19_q2_elec$TOTALKWH * 3.412
pge_19_q3_elec$kBTU = pge_19_q3_elec$TOTALKWH * 3.412
pge_19_q4_elec$kBTU = pge_19_q4_elec$TOTALKWH * 3.412
pge_20_q1_elec$kBTU = pge_20_q1_elec$TOTALKWH * 3.412
pge_20_q2_elec$kBTU = pge_20_q2_elec$TOTALKWH * 3.412
pge_20_q3_elec$kBTU = pge_20_q3_elec$TOTALKWH * 3.412
pge_17_q1_elec[7:8] <- list(NULL)
pge_17_q2_elec[7:8] <- list(NULL)
pge_17_q3_elec[7:8] <- list(NULL)
pge_17_q4_elec[7:8] <- list(NULL)
pge_18_q1_elec[7:8] <- list(NULL)
pge_18_q2_elec[7:8] <- list(NULL)
pge_18_q3_elec[7:8] <- list(NULL)
pge_18_q4_elec[7:8] <- list(NULL)
pge_19_q1_elec[7:8] <- list(NULL)
pge_19_q2_elec[7:8] <- list(NULL)
pge_19_q3_elec[7:8] <- list(NULL)
pge_19_q4_elec[7:8] <- list(NULL)
pge_20_q1_elec[7:8] <- list(NULL)
pge_20_q2_elec[7:8] <- list(NULL)
pge_20_q3_elec[7:8] <- list(NULL)
write_csv(pge_17_q1_elec,"PGE_2017_Q1_ElectricUsageByZipv2.csv")
write_csv(pge_17_q2_elec,"PGE_2017_Q2_ElectricUsageByZipv2.csv")
write_csv(pge_17_q3_elec,"PGE_2017_Q3_ElectricUsageByZipv2.csv")
write_csv(pge_17_q4_elec,"PGE_2017_Q4_ElectricUsageByZipv2.csv")
write_csv(pge_18_q1_elec,"PGE_2018_Q1_ElectricUsageByZipv2.csv")
write_csv(pge_18_q2_elec,"PGE_2018_Q2_ElectricUsageByZipv2.csv")
write_csv(pge_18_q3_elec,"PGE_2018_Q3_ElectricUsageByZipv2.csv")
write_csv(pge_18_q4_elec,"PGE_2018_Q4_ElectricUsageByZipv2.csv")
write_csv(pge_19_q1_elec,"PGE_2019_Q1_ElectricUsageByZipv2.csv")
write_csv(pge_19_q2_elec,"PGE_2019_Q2_ElectricUsageByZipv2.csv")
write_csv(pge_19_q3_elec,"PGE_2019_Q3_ElectricUsageByZipv2.csv")
write_csv(pge_19_q4_elec,"PGE_2019_Q4_ElectricUsageByZipv2.csv")
write_csv(pge_20_q1_elec,"PGE_2020_Q1_ElectricUsageByZipv2.csv")
write_csv(pge_20_q2_elec,"PGE_2020_Q2_ElectricUsageByZipv2.csv")
write_csv(pge_20_q3_elec,"PGE_2020_Q3_ElectricUsageByZipv2.csv")
#PGE Gas
pge_17_q1_gas <- read_csv("PGE_2017_Q1_GasUsageByZip.csv")
pge_17_q2_gas <- read_csv("PGE_2017_Q2_GasUsageByZip.csv")
pge_17_q3_gas <- read_csv("PGE_2017_Q3_GasUsageByZip.csv")
pge_17_q4_gas <- read_csv("PGE_2017_Q4_GasUsageByZip.csv")
pge_18_q1_gas <- read_csv("PGE_2018_Q1_GasUsageByZip.csv")
pge_18_q2_gas <- read_csv("PGE_2018_Q2_GasUsageByZip.csv")
pge_18_q3_gas <- read_csv("PGE_2018_Q3_GasUsageByZip.csv")
pge_18_q4_gas <- read_csv("PGE_2018_Q4_GasUsageByZip.csv")
pge_19_q1_gas <- read_csv("PGE_2019_Q1_GasUsageByZip.csv")
pge_19_q2_gas <- read_csv("PGE_2019_Q2_GasUsageByZip.csv")
pge_19_q3_gas <- read_csv("PGE_2019_Q3_GasUsageByZip.csv")
pge_19_q4_gas <- read_csv("PGE_2019_Q4_GasUsageByZip.csv")
pge_20_q1_gas <- read_csv("PGE_2020_Q1_GasUsageByZip.csv")
pge_20_q2_gas <- read_csv("PGE_2020_Q2_GasUsageByZip.csv")
pge_20_q3_gas <- read_csv("PGE_2020_Q3_GasUsageByZip.csv")
pge_17_q1_gas$kBTU = pge_17_q1_gas$TOTALTHM * 100
pge_17_q2_gas$kBTU = pge_17_q2_gas$TOTALTHM * 100
pge_17_q3_gas$kBTU = pge_17_q3_gas$TOTALTHM * 100
pge_17_q4_gas$kBTU = pge_17_q4_gas$TOTALTHM * 100
pge_18_q1_gas$kBTU = pge_18_q1_gas$TOTALTHM * 100
pge_18_q2_gas$kBTU = pge_18_q2_gas$TOTALTHM * 100
pge_18_q3_gas$kBTU = pge_18_q3_gas$TOTALTHM * 100
pge_18_q4_gas$kBTU = pge_18_q4_gas$TOTALTHM * 100
pge_19_q1_gas$kBTU = pge_19_q1_gas$TOTALTHM * 100
pge_19_q2_gas$kBTU = pge_19_q2_gas$TOTALTHM * 100
pge_19_q3_gas$kBTU = pge_19_q3_gas$TOTALTHM * 100
pge_19_q4_gas$kBTU = pge_19_q4_gas$TOTALTHM * 100
pge_20_q1_gas$kBTU = pge_20_q1_gas$TOTALTHM * 100
pge_20_q2_gas$kBTU = pge_20_q2_gas$TOTALTHM * 100
pge_20_q3_gas$kBTU = pge_20_q3_gas$TOTALTHM * 100
pge_17_q1_gas[7:8] <- list(NULL)
pge_17_q2_gas[7:8] <- list(NULL)
pge_17_q3_gas[7:8] <- list(NULL)
pge_17_q4_gas[7:8] <- list(NULL)
pge_18_q1_gas[7:8] <- list(NULL)
pge_18_q2_gas[7:8] <- list(NULL)
pge_18_q3_gas[7:8] <- list(NULL)
pge_18_q4_gas[7:8] <- list(NULL)
pge_19_q1_gas[7:8] <- list(NULL)
pge_19_q2_gas[7:8] <- list(NULL)
pge_19_q3_gas[7:8] <- list(NULL)
pge_19_q4_gas[7:8] <- list(NULL)
pge_20_q1_gas[7:8] <- list(NULL)
pge_20_q2_gas[7:8] <- list(NULL)
pge_20_q3_gas[7:8] <- list(NULL)
write_csv(pge_17_q1_gas,"PGE_2017_Q1_GasUsageByZipv2.csv")
write_csv(pge_17_q2_gas,"PGE_2017_Q2_GasUsageByZipv2.csv")
write_csv(pge_17_q3_gas,"PGE_2017_Q3_GasUsageByZipv2.csv")
write_csv(pge_17_q4_gas,"PGE_2017_Q4_GasUsageByZipv2.csv")
write_csv(pge_18_q1_gas,"PGE_2018_Q1_GasUsageByZipv2.csv")
write_csv(pge_18_q2_gas,"PGE_2018_Q2_GasUsageByZipv2.csv")
write_csv(pge_18_q3_gas,"PGE_2018_Q3_GasUsageByZipv2.csv")
write_csv(pge_18_q4_gas,"PGE_2018_Q4_GasUsageByZipv2.csv")
write_csv(pge_19_q1_gas,"PGE_2019_Q1_GasUsageByZipv2.csv")
write_csv(pge_19_q2_gas,"PGE_2019_Q2_GasUsageByZipv2.csv")
write_csv(pge_19_q3_gas,"PGE_2019_Q3_GasUsageByZipv2.csv")
write_csv(pge_19_q4_gas,"PGE_2019_Q4_GasUsageByZipv2.csv")
write_csv(pge_20_q1_gas,"PGE_2020_Q1_GasUsageByZipv2.csv")
write_csv(pge_20_q2_gas,"PGE_2020_Q2_GasUsageByZipv2.csv")
write_csv(pge_20_q3_gas,"PGE_2020_Q3_GasUsageByZipv2.csv")
# Loops
library(tidyverse)
years <- 2017:2020
quarters <- 1:4
types <- c("Electric", "Gas")
pge_elec_gas <- NULL
for(year in years) {
for(quarter in quarters){
for(type in types) {
filename <-
paste0(
"PGE_",
year,
"_q",
quarter,
"_",
type,
"UsageByZipv2.csv"
)
if(quarter %in% 3:4 & year %in% 2020){
next
}
print(filename)
temp <- read_csv(filename)
pge_elec_gas <- rbind(pge_elec_gas,temp)
# Note rbind requires field names to be consistent for every new thing that you add.
saveRDS(pge_elec_gas, "pge_elec_gas.rds")
}}}
## [1] "PGE_2017_q1_ElectricUsageByZipv2.csv"
## [1] "PGE_2017_q1_GasUsageByZipv2.csv"
## [1] "PGE_2017_q2_ElectricUsageByZipv2.csv"
## [1] "PGE_2017_q2_GasUsageByZipv2.csv"
## [1] "PGE_2017_q3_ElectricUsageByZipv2.csv"
## [1] "PGE_2017_q3_GasUsageByZipv2.csv"
## [1] "PGE_2017_q4_ElectricUsageByZipv2.csv"
## [1] "PGE_2017_q4_GasUsageByZipv2.csv"
## [1] "PGE_2018_q1_ElectricUsageByZipv2.csv"
## [1] "PGE_2018_q1_GasUsageByZipv2.csv"
## [1] "PGE_2018_q2_ElectricUsageByZipv2.csv"
## [1] "PGE_2018_q2_GasUsageByZipv2.csv"
## [1] "PGE_2018_q3_ElectricUsageByZipv2.csv"
## [1] "PGE_2018_q3_GasUsageByZipv2.csv"
## [1] "PGE_2018_q4_ElectricUsageByZipv2.csv"
## [1] "PGE_2018_q4_GasUsageByZipv2.csv"
## [1] "PGE_2019_q1_ElectricUsageByZipv2.csv"
## [1] "PGE_2019_q1_GasUsageByZipv2.csv"
## [1] "PGE_2019_q2_ElectricUsageByZipv2.csv"
## [1] "PGE_2019_q2_GasUsageByZipv2.csv"
## [1] "PGE_2019_q3_ElectricUsageByZipv2.csv"
## [1] "PGE_2019_q3_GasUsageByZipv2.csv"
## [1] "PGE_2019_q4_ElectricUsageByZipv2.csv"
## [1] "PGE_2019_q4_GasUsageByZipv2.csv"
## [1] "PGE_2020_q1_ElectricUsageByZipv2.csv"
## [1] "PGE_2020_q1_GasUsageByZipv2.csv"
## [1] "PGE_2020_q2_ElectricUsageByZipv2.csv"
## [1] "PGE_2020_q2_GasUsageByZipv2.csv"
#cleaning big dataset
#down to just commercial and res
pge_filter <- filter(
pge_elec_gas,
CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial",
"Gas- Residential",
"Gas- Commercial"
)
)
names(pge_filter)
## [1] "ZIPCODE" "MONTH" "YEAR" "CUSTOMERCLASS"
## [5] "COMBINED" "TOTALCUSTOMERS" "kBTU"
head(pge_filter)
## # A tibble: 6 x 7
## ZIPCODE MONTH YEAR CUSTOMERCLASS COMBINED TOTALCUSTOMERS kBTU
## <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
## 1 93101 1 2017 Elec- Commercial Y 0 0
## 2 93101 2 2017 Elec- Commercial Y 0 0
## 3 93101 3 2017 Elec- Commercial Y 0 0
## 4 93105 1 2017 Elec- Commercial Y 0 0
## 5 93105 2 2017 Elec- Commercial Y 0 0
## 6 93105 3 2017 Elec- Commercial Y 0 0
pge_select <-
select(
pge_filter,
-COMBINED
)
# Remove zips from outside 9 counties
ca_counties <- counties("CA", cb = T, progress_bar = F)
st_crs(ca_counties)
## Coordinate Reference System:
## User input: 4269
## wkt:
## GEOGCS["NAD83",
## DATUM["North_American_Datum_1983",
## SPHEROID["GRS 1980",6378137,298.257222101,
## AUTHORITY["EPSG","7019"]],
## TOWGS84[0,0,0,0,0,0,0],
## AUTHORITY["EPSG","6269"]],
## PRIMEM["Greenwich",0,
## AUTHORITY["EPSG","8901"]],
## UNIT["degree",0.0174532925199433,
## AUTHORITY["EPSG","9122"]],
## AUTHORITY["EPSG","4269"]]
projection <- "+proj=utm +zone=10 +ellps=GRS80 +datum=NAD83 +units=ft +no_defs"
ca_counties_transformed <-
ca_counties %>%
st_transform(4326) %>%
st_transform(26910) %>%
st_transform(projection) %>%
st_transform(st_crs(ca_counties))
## filter to just bay zips
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
counties("CA", cb = T, progress_bar = F) %>%
filter(NAME %in% bay_county_names)
ggplot(bay_counties) + geom_sf()
ca_cities <- places("CA", cb = T, progress_bar = FALSE)
bay_cities <- ca_cities[bay_counties, ]
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
pge_byzip <-
pge_select %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character()
) %>%
group_by(ZIPCODE,MONTH,YEAR,CUSTOMERCLASS) %>%
summarize(
kBTU = sum(kBTU, na.rm = T)
) %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
pge_byzip2 <-
pge_byzip %>%
st_set_geometry(NULL)
#Rename dates
library(zoo)
pge_byzip2$DATE <- as.yearmon(paste(pge_byzip2$YEAR, pge_byzip2$MONTH), "%Y %m")
pge_bydate <-
select(
pge_byzip2,
DATE, CUSTOMERCLASS, kBTU
)
#group
pge_final <-
group_by(
pge_bydate,
DATE,
CUSTOMERCLASS
)
#summarize by date
pge_summarize <-
summarize(
pge_final,
kBTU =
sum(
kBTU,
na.rm = TRUE
)
)
str(pge_summarize)
## tibble [169 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ DATE : 'yearmon' num [1:169] Jan 2017 Jan 2017 Jan 2017 Jan 2017 ...
## $ CUSTOMERCLASS: chr [1:169] "Elec- Commercial" "Elec- Residential" "Gas- Commercial" "Gas- Residential" ...
## $ kBTU : num [1:169] 5.63e+09 4.80e+09 4.60e+09 1.79e+10 4.69e+09 ...
## - attr(*, "groups")= tibble [43 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ DATE : 'yearmon' num [1:43] Jan 2017 Feb 2017 Mar 2017 Apr 2017 ...
## ..$ .rows: list<int> [1:43]
## .. ..$ : int [1:4] 1 2 3 4
## .. ..$ : int [1:4] 5 6 7 8
## .. ..$ : int [1:4] 9 10 11 12
## .. ..$ : int [1:4] 13 14 15 16
## .. ..$ : int [1:4] 17 18 19 20
## .. ..$ : int [1:4] 21 22 23 24
## .. ..$ : int [1:4] 25 26 27 28
## .. ..$ : int [1:4] 29 30 31 32
## .. ..$ : int [1:4] 33 34 35 36
## .. ..$ : int [1:4] 37 38 39 40
## .. ..$ : int [1:4] 41 42 43 44
## .. ..$ : int [1:4] 45 46 47 48
## .. ..$ : int [1:4] 49 50 51 52
## .. ..$ : int [1:4] 53 54 55 56
## .. ..$ : int [1:4] 57 58 59 60
## .. ..$ : int [1:4] 61 62 63 64
## .. ..$ : int [1:4] 65 66 67 68
## .. ..$ : int [1:4] 69 70 71 72
## .. ..$ : int [1:4] 73 74 75 76
## .. ..$ : int [1:4] 77 78 79 80
## .. ..$ : int [1:4] 81 82 83 84
## .. ..$ : int [1:4] 85 86 87 88
## .. ..$ : int [1:4] 89 90 91 92
## .. ..$ : int [1:4] 93 94 95 96
## .. ..$ : int [1:4] 97 98 99 100
## .. ..$ : int [1:4] 101 102 103 104
## .. ..$ : int [1:4] 105 106 107 108
## .. ..$ : int [1:4] 109 110 111 112
## .. ..$ : int [1:4] 113 114 115 116
## .. ..$ : int [1:4] 117 118 119 120
## .. ..$ : int [1:4] 121 122 123 124
## .. ..$ : int [1:4] 125 126 127 128
## .. ..$ : int [1:4] 129 130 131 132
## .. ..$ : int [1:4] 133 134 135 136
## .. ..$ : int [1:4] 137 138 139 140
## .. ..$ : int [1:4] 141 142 143 144
## .. ..$ : int [1:4] 145 146 147 148
## .. ..$ : int [1:4] 149 150 151 152
## .. ..$ : int [1:4] 153 154 155 156
## .. ..$ : int [1:4] 157 158 159 160
## .. ..$ : int [1:4] 161 162 163 164
## .. ..$ : int [1:4] 165 166 167 168
## .. ..$ : int 169
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
summary(pge_summarize)
## DATE CUSTOMERCLASS kBTU
## Min. :2017 Length:169 Min. :0.000e+00
## 1st Qu.:2018 Class :character 1st Qu.:3.659e+09
## Median :2019 Mode :character Median :4.513e+09
## Mean :2019 Mean :5.262e+09
## 3rd Qu.:2020 3rd Qu.:5.258e+09
## Max. :2020 Max. :1.795e+10
## NA's :1
str(pge_summarize$CUSTOMERCLASS)
## chr [1:169] "Elec- Commercial" "Elec- Residential" "Gas- Commercial" ...
summary(pge_summarize$CUSTOMERCLASS)
## Length Class Mode
## 169 character character
pge_wide <-
pivot_wider(
pge_summarize,
names_from = CUSTOMERCLASS,
values_from = kBTU
)
pge_tidy <-
pivot_longer(
pge_wide,
c("Elec- Commercial", "Elec- Residential", "Gas- Commercial", "Gas- Residential"),
names_to = "CUSTOMERCLASS",
values_to = "kBTU"
)
pge_tidy2 <-
select(pge_tidy,
DATE, CUSTOMERCLASS, kBTU)
The following bar chart shows monthly total kBTUs of residential and commercial electricity and gas consumption for the Bay Area from 2017 to 2020.
## Plots
library(tidyverse)
library(plotly)
pge_chart <-
pge_summarize %>%
ggplot() +
geom_bar(
aes(
x = DATE %>% factor(),
y = kBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Date",
y = "kBTU",
title = "PG&E Territory Monthly Electricity Usage",
fill = "Electricity Type"
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)
)
pge_chart %>%
ggplotly() %>%
layout(
xaxis = list(fixedrange = T),
yaxis = list(fixedrange = T)
) %>%
config(displayModeBar = F)
Overall, residential gas use looks a bit higher in April 2020 than in April of years past, which could have been a result of the COVID-19 pandemic. However, energy use seemed to have returned to normal later on in 2020. This could have been a result of more people spending time at home (residential) rather than at work or in restaurants, at the gym, or in other commercial locations.
It is also interesting to note a potential data error in September of 2017. Electric - Residential and Electric - Commercial usage seem much higher than in the following years. For Electric - Residential, this is largely a result of zip code 93308, which seems to have a potential error in the data, as following years have much lower usage.
#remove NA and adjust for September
## Mapping, compare 2019 Q2 and 2020 Q2
pge_final_2019only <- filter(
pge_final,
YEAR %in%
c("2019"
)
)
pge_final_2019only$kBTU2019 <- pge_final_2019only$kBTU
# narrowing to Q2 and percent change
Q2_2019 <- filter(
pge_final_2019only,
MONTH %in%
c("4","5","6"),
CUSTOMERCLASS %in%
c("Elec- Commercial", "Elec- Residential")
)
# sum all months first
#group
Q2_2019_group <-
group_by(
Q2_2019,
ZIPCODE,
)
#summarize by date
Q2_2019_summarize <-
summarize(
Q2_2019_group,
kBTU2019 =
sum(
kBTU2019,
na.rm = TRUE
)
)
pge_final_2020only <- filter(
pge_final,
YEAR %in%
c("2020"
)
)
pge_final_2020only$kBTU2020 <- pge_final_2020only$kBTU
Q2_2020 <- filter(
pge_final_2020only,
MONTH %in%
c("4","5","6"),
CUSTOMERCLASS %in%
c("Elec- Commercial", "Elec- Residential")
)
#group
Q2_2020_group <-
group_by(
Q2_2020,
ZIPCODE,
)
#summarize by date
Q2_2020_summarize <-
summarize(
Q2_2020_group,
kBTU2020 =
sum(
kBTU2020,
na.rm = TRUE
)
)
# comparing 2019 and 2020
compare2019_2020 <- merge(Q2_2019_summarize, Q2_2020_summarize, by =c("ZIPCODE"))
view(compare2019_2020)
#then add percent change
compare2019_2020$percentchange <- ((compare2019_2020$kBTU2020 / compare2019_2020$kBTU2019)-1)*100
finalQ2 <- na.omit(compare2019_2020)
view(finalQ2)
## Geospatial Data from book
library(tidyverse)
library(sf)
library(tigris)
library(leaflet)
ca_counties <- counties("CA", cb = T, progress_bar = F)
st_crs(ca_counties)
## Coordinate Reference System:
## User input: 4269
## wkt:
## GEOGCS["NAD83",
## DATUM["North_American_Datum_1983",
## SPHEROID["GRS 1980",6378137,298.257222101,
## AUTHORITY["EPSG","7019"]],
## TOWGS84[0,0,0,0,0,0,0],
## AUTHORITY["EPSG","6269"]],
## PRIMEM["Greenwich",0,
## AUTHORITY["EPSG","8901"]],
## UNIT["degree",0.0174532925199433,
## AUTHORITY["EPSG","9122"]],
## AUTHORITY["EPSG","4269"]]
projection <- "+proj=utm +zone=10 +ellps=GRS80 +datum=NAD83 +units=ft +no_defs"
ca_counties_transformed <-
ca_counties %>%
st_transform(4326) %>%
st_transform(26910) %>%
st_transform(projection) %>%
st_transform(st_crs(ca_counties))
ggplot(ca_counties) + geom_sf()
leaflet() %>%
addTiles() %>%
addPolygons(
data = ca_counties %>%
st_transform(4326)
) %>%
addMarkers(
data = ca_counties %>%
st_centroid() %>%
st_transform(4326)
)
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
ca_counties %>%
filter(NAME %in% bay_county_names)
ggplot(bay_counties) + geom_sf()
ca_cities <- places("CA", cb = T, progress_bar = FALSE)
bay_cities <- ca_cities[bay_counties, ]
bay_cities_within <-
ca_cities %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(ca_cities %>% select(GEOID)) %>%
st_as_sf()
bay_cities_within <-
ca_cities[which(ca_cities$GEOID %in% st_centroid(ca_cities)[bay_counties, ]$GEOID), ]
leaflet() %>%
addTiles() %>%
addPolygons(
data = bay_counties %>%
st_transform(4326),
fill = F,
weight = 2
) %>%
addPolygons(
data = bay_cities %>%
filter(!GEOID %in% bay_cities_within$GEOID) %>%
st_transform(4326),
color = "red"
) %>%
addPolygons(
data = bay_cities_within %>%
st_transform(4326),
color = "green"
)
bay_cbgs <- block_groups("CA", bay_county_names[1:9], cb = T, progress_bar = F)
bay_cbgs <-
bay_county_names %>%
map_dfr(function(county) {
block_groups("CA", county, cb = T, progress_bar = F)
})
bay_cbgs_clip <- st_read("https://opendata.arcgis.com/datasets/037fc1597b5a4c6994b89c46a8fb4f06_0.geojson")
## Reading layer `f24c24a2-d217-45fe-91b9-2abc9631985e202044-1-3dkk4a.hgijj' from data source `https://opendata.arcgis.com/datasets/037fc1597b5a4c6994b89c46a8fb4f06_0.geojson' using driver `GeoJSON'
## Simple feature collection with 4751 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -123.5337 ymin: 36.89298 xmax: -121.2082 ymax: 38.86424
## CRS: 4326
ggplot(bay_cbgs_clip)+geom_sf()
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
The following map highlights which neighborhoods experienced the greatest change in electricity consumption before and after the pandemic began by comparing Q2 commercial and residential electricity (not gas) in 2019 and 2020. Many neighborhoods did have higher electricity use in 2020, though there were only a few zip codes with increases of 50% or higher. There were also some zip codes with less electricity use following the pandemic.
As this map combines commercial and residential data, it is possible that we are ignorning shifts from commercial to residential electricty use, which may be expected following the Monthly Electricity Usage plot above. Additionally, there are a number of neighborhoods where percent change from 2019 to 2020 could not be calculated (NA). These areas cover a decent amount of the Bay Area, meaning this map could underestimate how much electricity was being used overall. Lastly, because gas data is not included, this is not giving us a holistic picture of overall energy use.
#where this needs to change to show percent change, etc
finalQ2map <-
finalQ2 %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
res_pal <- colorNumeric(
palette = "Blues",
domain =
finalQ2$percentchange
)
mymap<- leaflet() %>%
addTiles() %>%
addPolygons(
data = finalQ2map,
fillColor = ~res_pal(percentchange),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(percentchange),
" percent change total in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = finalQ2map,
pal = res_pal,
values = ~percentchange,
title = "Percent Change Electricity Use, Q2 2019-2020"
)
mymap